---
title: "R code for generating tables and figures for Residential mobility and persistently depressed voting among disadvantaged adults in a large housing experiment"
authors: David Knight and Baobao Zhang
output: html_document
---

# Load packages and data

```{r setup, include=FALSE}
# Load libraries
library(tidyverse)
library(readxl)
library(ggplot2)

# Create a file directory
my_file_directory <- ""

# Write a function to load in the data
# Disclosure Excel #0
output_sheets <- length(excel_sheets(paste0(my_file_directory, "data/output_disclosure_0.xlsx"))) - 2
# Disclosure Excel #1
output_sheets_p <- length(excel_sheets(paste0(my_file_directory, "data/output_disclosure_1.xlsx"))) - 2
# Disclosure Excel #2
output_sheets_d <- length(excel_sheets(paste0(my_file_directory, "data/output_disclosure_2.xlsx"))) - 2
# Disclosure Excel #3
output_sheets_rr <- length(excel_sheets(paste0(my_file_directory, "data/output_disclosure_3.xlsx")))


# Function to load Excel #0
mto_load <- function(sheet_number) {
  df <- read_excel(paste0(my_file_directory, "data/output_disclosure_0.xlsx"), sheet = sheet_number)
  df_c <- df %>% 
  fill(output_file_name, disclosure_sample_file_name, sample_name,
       outcome_variable, outcome_label, subgroup, baseline_covariates,
       N, N_clusters, .direction = "down") 
  return(df_c)
}

# Function to add p-values to the the first disclosure Excel that was missing them: Excel #1
mto_load_p_values <- function(sheet_number) {
  df <- read_excel(paste0(my_file_directory, "data/output_disclosure_1.xlsx"), sheet = sheet_number)
  df_c <- df %>% 
  fill(output_file_name, disclosure_sample_file_name, sample_name,
       outcome_variable, outcome_label, subgroup, baseline_covariates, p_values,
       .direction = "down") 
  return(df_c)
}

# Function to load Excel #2
mto_load_demo <- function(sheet_number) {
  df <- read_excel(paste0(my_file_directory, "data/output_disclosure_2.xlsx"), sheet = sheet_number)
  df_c <- df %>% 
  fill(output_file_name, disclosure_sample_file_name, sample_name, subgroup,
       outcome_variable, outcome_label, subgroup, baseline_covariates, p_values,
       .direction = "down") 
  return(df_c)
}

# Function to load Excel #3
mto_load_rr <- function(sheet_number) {
  df <- read_excel(paste0(my_file_directory, "data/output_disclosure_3.xlsx"), sheet = sheet_number)
  df_c <- df %>% 
  fill(output_file_name, disclosure_sample_file_name, sample_name, subgroup,
       outcome_variable, outcome_label, subgroup, baseline_covariates, p_values,
       .direction = "down") 
  return(df_c)
}

# Trailing zeros rounding function; default is 2 decimal points
roundfunc <- function(x,
                      round_digits = 2,
                      lessthan = TRUE) {
  if (lessthan) {
    temp <- ifelse(x > 0 & round(x, round_digits) == 0,
                   paste0("<0.", rep(0, (round_digits - 1)), 1),
                   sprintf(paste0("%.", round_digits, "f"), round(x, round_digits)))
    temp <- ifelse(x < 0 & round(x, round_digits) == 0,
                   paste0(">-0.", rep(0, (round_digits - 1)), 1),
                   temp)
    temp[x == 0] <- 0
    return(temp)
  } else {
    return(sprintf(paste0("%.", round_digits, "f"), round(x, round_digits)))
  }
}

# Turn the first set of disclosure Excel results into a data frame
df_list <- lapply(mto_load, X = 1:output_sheets)
df <- do.call(rbind, df_list)

# Re-label outcome variable
df$outcome_label <- gsub(pattern = "Proportion of general elections voted in",
                            replacement = "Proportion of federal elections voted in",
                            x = df$outcome_label)

df$outcome_label <- gsub(pattern = "General Elections",
                            replacement = "Federal Election",
                         ignore.case = FALSE,
                            x = df$outcome_label)

# Save the previous disclosure stuff as df_old
df_old <- df

# Read in the new p-value data
df_p <- lapply(mto_load_p_values, X = 1:output_sheets_p)
df_p <- do.call(plyr::rbind.fill, df_p)

# Relabel outcomes so the p-values can be merged
df_p$outcome_label <- gsub(pattern = "Proportion of presidential and midterm elections voted in",
                            replacement = "Proportion of federal elections voted in",
                            x = df_p$outcome_label)
# Clean up labels
df_p$outcome_label <- gsub(pattern = "General Elections",
                            replacement = "Federal Election",
                         ignore.case = FALSE,
                            x = df_p$outcome_label)

# Old Excel sheets without the coefficients
df_p_old <- df_p[is.na(df_p$coefficients),]
# New Excel sheets with the coefficients 
df_p_new <- df_p[!is.na(df_p$coefficients),]

# Delete previously disclosed variables in the old Excel sheets
df_p_old$coefficients <- NULL
df_p_old$standard_errors <- NULL
df_p_old$N <- NULL
df_p_old$N_clusters <- NULL

# Add the p-values to disclosure Excel #1
df <- merge(x = df, y = df_p_old, all.x = TRUE,
             by = c("output_file_name",
                    "disclosure_sample_file_name",
                    "sample_name",
                    "outcome_variable",
                    "outcome_label",
                    "subgroup",
                    "baseline_covariates",
                    "variable_name",
                    "analysis_type"))

# Load in the demographic subgroup analysis dataset (disclosure Excel #2)
df_d <- lapply(mto_load_rr, X = 1:output_sheets_d)
df_d <- do.call(rbind, df_d)

# Load in the revise and resubmit dataset disclosure Excel #3)
df_rr <- lapply(mto_load_rr, X = 1:output_sheets_rr)
df_rr <- do.call(rbind, df_rr)

```

# Main analysis

```{r main-analysis, include=FALSE}

# Specify the 2 main outcomes
main_outcome <- c("In voter file", "Proportion of federal elections voted in")

# Create more descriptive label
df$outcome_variable_c <- df$outcome_label
df$outcome_variable_c[df$outcome_label == "In voter file"] <-
  "Percentage in voter file"
df$outcome_variable_c[df$outcome_label == "Proportion of federal elections voted in"] <-
  "Percentage of federal elections voted in"

# Clean up the experimental groups
df$group_name_c <- df$variable_name
df$group_name_c[df$variable_name %in% c("Control mean", "exp_groupExp = exp_groupSec8")] <- NA
df$group_name_c[df$group_name_c == "Sec8"] <- "Section 8"
df$group_name_c[df$group_name_c == "Exp"] <- "Experimental voucher"
df$group_name_c <- factor(df$group_name_c, levels = c("Experimental voucher",
                                                      "Section 8",
                                                      "Control"))

# Figure 1. Marginal means of the two main outcomes, by experimental group and age group at time of randomization
ggplot(df[df$outcome_label %in% main_outcome & df$baseline_covariates == "FALSE" &
            df$analysis_type == "Marginal means: from ITT regression",]) +
  geom_pointrange(aes(x = group_name_c, y = coefficients,
                      ymin = coefficients+qnorm(0.025)*standard_errors,
                      ymax = coefficients+qnorm(0.975)*standard_errors, 
                     color = group_name_c,
                     shape = group_name_c)) +
  facet_grid(subgroup~outcome_variable_c, scales="free") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  xlab("Experimental groups") +
  ylab("Percentage") + 
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

# Save the graphics
# PDF
ggsave(paste0(my_file_directory, "figures/main_coef_plot.pdf"),
       width = 7, height = 4, dpi = 300)
# PNG
ggsave(paste0(my_file_directory, "main_coef_plot.png"),
       width = 7, height = 4, dpi = 300)

# Make the voter files plot
ggplot(df[df$outcome_label == "In voter file" & df$baseline_covariates == "FALSE" &
            df$analysis_type == "Marginal means: from ITT regression",]) +
  geom_pointrange(aes(x = group_name_c, y = coefficients,
                      ymin = coefficients+qnorm(0.025)*standard_errors,
                      ymax = coefficients+qnorm(0.975)*standard_errors, 
                     color = group_name_c)) +
  geom_text(aes(x = group_name_c, y = coefficients, label = roundfunc(coefficients*100, 1),
                color = group_name_c), nudge_x = 0.3) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  xlab("Experimental groups") +
  ylab("Percentage") + 
  facet_grid(subgroup~., scales="free") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

# Save the graphics
# PDF
ggsave(paste0(my_file_directory, "figures/coef_voter_files.pdf"),
       width = 7, height = 5.5, dpi = 300)
# PNG
ggsave(paste0(my_file_directory, "figures/coef_voter_files.png"),
       width = 7, height = 5.5, dpi = 300)

# Figure: Proportion of federal elections voted in
ggplot(df[df$outcome_label == "Proportion of federal elections voted in" & df$baseline_covariates == "FALSE" &
            df$analysis_type == "Marginal means: from ITT regression",]) +
  geom_pointrange(aes(x = group_name_c, y = coefficients,
                      ymin = coefficients+qnorm(0.025)*standard_errors,
                      ymax = coefficients+qnorm(0.975)*standard_errors, 
                     color = group_name_c,
                     shape = group_name_c)) +
  geom_text(aes(x = group_name_c, y = coefficients, label = roundfunc(coefficients*100, 1),
                color = group_name_c), nudge_x = 0.3) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  xlab("Experimental groups") +
  ylab("Percentage") + 
  facet_grid(subgroup~., scales="free") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

# Save the graphics
# PDF
ggsave(paste0(my_file_directory, "figures/coef_federal_elections.pdf"),
       width = 7, height = 5.5, dpi = 300)
# PNG
ggsave(paste0(my_file_directory, "figures/coef_federal_elections.png"),
       width = 7, height = 5.5, dpi = 300)

# Clean up the experimental groups
df_p_new$group_name_c <- df_p_new$variable_name
df_p_new$group_name_c[df_p_new$variable_name %in% c("Control mean", "exp_groupExp = exp_groupSec8")] <- NA
df_p_new$group_name_c[df_p_new$group_name_c == "Sec8"] <- "Section 8"
df_p_new$group_name_c[df_p_new$group_name_c == "Exp"] <- "Experimental voucher"
df_p_new$group_name_c <- factor(df_p_new$group_name_c, levels = c("Experimental voucher",
                                                      "Section 8",
                                                      "Control"))

df_p_new$outcome_label_c <- gsub(pattern = "Proportion", replacement = "Percentage", x = df_p_new$outcome_label)

# Fig S2: Marginal means of voting in midterm elections and presidential elections, by experimental group and age group at time of randomization
ggplot(df_p_new[df_p_new$baseline_covariates == "FALSE" &
            df_p_new$analysis_type == "Marginal means: from ITT regression",]) +
  geom_pointrange(aes(x = group_name_c, y = coefficients,
                      ymin = coefficients+qnorm(0.025)*standard_errors,
                      ymax = coefficients+qnorm(0.975)*standard_errors, 
                     color = group_name_c,
                     shape = group_name_c)) +
  geom_text(aes(x = group_name_c, y = coefficients, label = roundfunc(coefficients*100, 1),
                color = group_name_c), nudge_x = 0.3, size = 3) +
  facet_grid(subgroup~outcome_label_c, scales="free") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  xlab("Experimental groups") +
  ylab("Percentage") + 
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

# Save the graphics
# PDF
ggsave(paste0(my_file_directory, "figures/by_midterm_prez_coef_plot.pdf"),
       width = 7, height = 5, dpi = 300)
# PNG
ggsave(paste0(my_file_directory, "figures/by_midterm_prez_coef_plot.png"),
       width = 7, height = 5, dpi = 300)

```

# Voter participation by election year

```{r by-election}

# Clean up the label for the election years
by_election <- df$outcome_label[grep(x = df$outcome_label, pattern = "Voted: ")]
df$outcome_label_election <- df$outcome_label
df$outcome_label_election[!df$outcome_label %in% by_election] <- NA
df$outcome_label_election <- gsub(pattern = "Voted: Federal Election ",
                                  replacement = "",
                                  x = df$outcome_label_election)

# Figure 2. Marginal means of turnout for each presidential election year, by experimental group and age group at time of randomization. 
ggplot(df[df$outcome_label %in% by_election & df$baseline_covariates == "FALSE" &
            df$analysis_type == "Marginal means: from ITT regression",]) +
  geom_pointrange(aes(x = group_name_c, y = coefficients,
                      ymin = coefficients+qnorm(0.025)*standard_errors,
                      ymax = coefficients+qnorm(0.975)*standard_errors, 
                     color = group_name_c,
                     shape = group_name_c)) +
  facet_grid(outcome_label_election~subgroup, scales="free") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  xlab("Experimental groups") +
  ylab("Percentage voted in presidential elections") + 
  coord_flip() +
  theme_bw() +
  theme(legend.position = "none")

# Save the graphs
# PDF
ggsave(paste0(my_file_directory, "figures/by_election_coef_plot.pdf"),
       width = 7, height = 5, dpi = 300)
# PNG
ggsave(paste0(my_file_directory, "figures/by_election_coef_plot.png"),
       width = 7, height = 5, dpi = 300)

```

# Making tables for the manuscript

```{r table-hochberg}
# Apply the Benjamini-Hochberg procedure
df$p_value_calcs <- df$analysis_type %in% c("ITT estimates", "TOT estimates",
                                            "Linear hypothesis: from ITT regression") &
  df$variable_name != "Control mean"

# Apply procedure within each age group
df$hochberg_sig <- NA
df$hochberg_sig[df$p_value_calcs & df$subgroup == "Adult"] <- 
  p.adjust(p = df$p_values[df$p_value_calcs & df$subgroup == "Adult"], method = "BH")
df$hochberg_sig[df$p_value_calcs & df$subgroup == "Age < 13"] <- 
  p.adjust(p = df$p_values[df$p_value_calcs & df$subgroup == "Age < 13"], method = "BH")
df$hochberg_sig[df$p_value_calcs & df$subgroup == "Age 13-18"] <- 
  p.adjust(p = df$p_values[df$p_value_calcs & df$subgroup == "Age 13-18"], method = "BH")

# Generate significance stars
df$hochberg_sig_star <- ""
df$hochberg_sig_star[df$hochberg_sig < 0.05] <- "*"
df$hochberg_sig_star[df$hochberg_sig < 0.01] <- "**"
df$hochberg_sig_star[df$hochberg_sig < 0.001] <- "**"
# Add stars to the outputs
df$coef_se <- paste0(roundfunc(df$coefficients, 3), " (",
                     roundfunc(df$standard_errors, 3), ")", df$hochberg_sig_star)
# Fix label
df_prop <- df[df$outcome_label == "Proportion of federal elections voted in",]

```


```{r make-tables-1}
# Intention-to-treat (ITT) estimates: main outcomes
t_itt <- df[df$outcome_label %in% main_outcome & df$analysis_type == "ITT estimates",]
t_itt$variable_name[t_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
t_itt$variable_name[t_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"
table(t_itt$variable_name)
t_itt_nc <- t_itt[t_itt$baseline_covariates == "FALSE",]
table(t_itt_nc$variable_name)
t_itt_wc <- t_itt[t_itt$baseline_covariates == "TRUE",]

# Treatment-on-the-treated (TOT) main outcomes
t_tot <- df[df$outcome_label %in% main_outcome & df$analysis_type == "TOT estimates",]
t_tot$variable_name[t_tot$variable_name == "Exp"] <- "TOT estimate: Experimental voucher"
t_tot$variable_name[t_tot$variable_name == "Sec8"] <- "TOT estimate: Section 8"
t_tot_nc <- t_tot[t_tot$baseline_covariates == "FALSE",]
t_tot_wc <- t_tot[t_tot$baseline_covariates == "TRUE",]

# Variable names 
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")

# Make a table to write the outputs
output_tables <- function(data_frame_input, file_name) {
  df <- data_frame_input
  names(df) <- c("Outcome", "Age group at randomization",
                 "Variable", "Coefficient (SE)", "N individuals",
                 "N households")
  write.csv(df, file = paste0(my_file_directory, "tables/", file_name, ".csv"),
          row.names = FALSE, na = "")

}

# Table 1. Intention-to-treat estimates by age group at the age of randomization (without pre-treatment covariates): main outcomes.
# Main outcome: ITT, no covariates
output_tables(t_itt_nc[,var_include], file_name = "main_outcomes_itt_nc")

# Table 2. Treatment-on-the-treated estimates by age group at the age of randomization (without pre-treatment covariates): main outcomes. 
# Main outcome: TOT, no covariates
output_tables(t_tot_nc[,var_include], file_name = "main_outcomes_tot_nc")

# Table S2: Intention-to-treat estimates by age group at the age of randomization (with baseline covariates): key outcomes. 
# Main outcome: ITT, with covariates
output_tables(t_itt_wc[,var_include], file_name = "main_outcomes_itt_wc")

# Table S3. Treatment-on-the-treated estimates by age group at the age of randomization (with baseline covariates): key outcomes.
# Main outcome: TOT, with covariates
output_tables(t_tot_wc[,var_include], file_name = "main_outcomes_tot_wc")

```

# Linear hypothesis testing for difference between the two experimental groups

```{r linear-hypothesis}
# Table S4: Linear hypothesis testing shows no difference between the ITT estimates of the Experimental vouchers and the Section 8 vouchers for the main outcomes.

# Testing differences between the two treatment groups (Experimental vs Section 8)
t_lh <- df[df$outcome_label %in% main_outcome & df$analysis_type == "Linear hypothesis: from ITT regression",]

# Variable names 
var_include <- c("outcome_label", "subgroup", "baseline_covariates",
                 "variable_name",
                 "coef_se", "N", "N_clusters")

# Difference between the Experimental results and Section 8 results 
output_tables(t_lh[,var_include], file_name = "linear_hypothesis")
```

# Check if PIK duplicated or missing is affected by experimental condition

```{r check-missing}
# Table S1. The proportion of missing or duplicated Protected Identification Key (PIK) is not statistically different across the three experimental groups. 

df_pik <- df[df$outcome_label == "PIK missing",]
df_pik <- df_pik[df_pik$analysis_type == "ITT estimates",]
df_pik$outcome_label <- "PIK missing or duplicated"
df_pik$variable_name[df_pik$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_pik$variable_name[df_pik$variable_name == "Sec8"] <- "ITT estimate: Section 8"
output_tables(df_pik[,var_include], file_name = "missing_duplicated")
```

# Voter participation by presidential election year

```{r pres-year}
# Clean up labels
t_itt_yr <- df[!is.na(df$outcome_label_election) &
                 df$analysis_type == "ITT estimates",]
t_itt_yr$variable_name[t_itt_yr$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
t_itt_yr$variable_name[t_itt_yr$variable_name == "Sec8"] <- "ITT estimate: Section 8"
table(t_itt_yr$variable_name)
t_itt_yr_nc <- t_itt_yr[t_itt_yr$baseline_covariates == "FALSE",]
table(t_itt_yr_nc$variable_name)
t_itt_yr_wc <- t_itt_yr[t_itt_yr$baseline_covariates == "TRUE",]

# Clean up labels
t_tot_yr <- df[!is.na(df$outcome_label_election) & df$analysis_type == "TOT estimates",]
t_tot_yr$variable_name[t_tot_yr$variable_name == "Exp"] <- "TOT estimate: Experimental voucher"
t_tot_yr$variable_name[t_tot_yr$variable_name == "Sec8"] <- "TOT estimate: Section 8"
t_tot_yr_nc <- t_tot_yr[t_tot_yr$baseline_covariates == "FALSE",]
t_tot_yr_wc <- t_tot_yr[t_tot_yr$baseline_covariates == "TRUE",]

# Variable names 
var_include <- c("outcome_variable_c", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
t_itt_yr_nc$outcome_variable_c <- gsub(pattern = "Federal Election ",
                                       replacement = "",
                                       x = t_itt_yr_nc$outcome_variable_c)
t_tot_yr_nc$outcome_variable_c <- gsub(pattern = "Federal Election ",
                                       replacement = "",
                                       x = t_tot_yr_nc$outcome_variable_c)

# Table S5: Intention-to-treat estimates by age group at the age of randomization (without baseline covariates): voting by presidential election year. 
# Presidential years: ITT NC
output_tables(t_itt_yr_nc[,var_include], file_name = "pres_year_itt_nc")

# Table S6: Treatment-on-the-treated estimates by age group at the age of randomization (without baseline covariates): voting by presidential election year. 
# Presidential years: TOT NC
output_tables(t_tot_yr_nc[,var_include], file_name = "pres_year_tot_nc")

```

# Voter participation in midterm vs. presidential election years

```{r pres-midterm}
# Clean up the labels
df_p_new$p_value_calcs <- df_p_new$analysis_type %in% c("ITT estimates", "TOT estimates",
                                            "Linear hypothesis: from ITT regression") &
  df_p_new$variable_name != "Control mean"

# Apply the Benjamini-Hochberg procedure
df_p_new$hochberg_sig <- NA
df_p_new$hochberg_sig[df_p_new$p_value_calcs & df_p_new$subgroup == "Adult"] <- 
  p.adjust(p = df_p_new$p_values[df_p_new$p_value_calcs & df_p_new$subgroup == "Adult"], method = "BH")
df_p_new$hochberg_sig[df_p_new$p_value_calcs & df_p_new$subgroup == "Age < 13"] <- 
  p.adjust(p = df_p_new$p_values[df_p_new$p_value_calcs & df_p_new$subgroup == "Age < 13"], method = "BH")
df_p_new$hochberg_sig[df_p_new$p_value_calcs & df_p_new$subgroup == "Age 13-18"] <- 
  p.adjust(p = df_p_new$p_values[df_p_new$p_value_calcs & df_p_new$subgroup == "Age 13-18"], method = "BH")

# Add stars for statistical significance
df_p_new$hochberg_sig_star <- ""
df_p_new$hochberg_sig_star[df_p_new$hochberg_sig < 0.05] <- "*"
df_p_new$hochberg_sig_star[df_p_new$hochberg_sig < 0.01] <- "**"
df_p_new$hochberg_sig_star[df_p_new$hochberg_sig < 0.001] <- "**"
# Add the stars to the tables
df_p_new$coef_se <- paste0(roundfunc(df_p_new$coefficients, 3), " (",
                     roundfunc(df_p_new$standard_errors, 3), ")", df_p_new$hochberg_sig_star)

df_p_new$variable_name[df_p_new$variable_name == "Exp" &
                         df_p_new$analysis_type == "ITT estimates"] <-
  "ITT estimate: Experimental voucher"
df_p_new$variable_name[df_p_new$variable_name == "Sec8" &
                         df_p_new$analysis_type == "ITT estimates"] <-
  "ITT estimate: Section 8"

df_p_new$variable_name[df_p_new$variable_name == "Exp" &
                         df_p_new$analysis_type == "TOT estimates"] <-
  "TOT estimate: Experimental voucher"
df_p_new$variable_name[df_p_new$variable_name == "Sec8" &
                         df_p_new$analysis_type == "TOT estimates"] <-
  "TOT estimate: Section 8"

# Variable names 
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
df_p_new$outcome_label <- factor( df_p_new$outcome_label)
df_p_new$subgroup <- factor(df_p_new$subgroup)
df_p_new_c <- df_p_new[order(df_p_new$outcome_label, df_p_new$subgroup),]

# Table S7: Intention-to-treat estimates by age group at the age of randomization (without baseline covariates): voting in midterm elections and presidential elections.
# Presidential and midterm outcomes: ITT, no covariates
output_tables(df_p_new_c[df_p_new_c$analysis_type == "ITT estimates" &
                           df_p_new_c$baseline_covariates == FALSE, var_include], file_name = "prez_midterm_itt_nc")

```

# By demographic subgroups

```{r demographic}
df_d$p_value_calcs <- df_d$analysis_type %in% c("ITT estimates") &
  df_d$variable_name != "Control mean"

df_d$hochberg_sig <- NA
df_d$hochberg_sig[df_d$p_value_calcs & df_d$subgroup == "Adult"] <- 
  p.adjust(p = df_d$p_values[df_d$p_value_calcs & df_d$subgroup == "Adult"], method = "BH")
df_d$hochberg_sig[df_d$p_value_calcs & df_d$subgroup == "Age < 13"] <- 
  p.adjust(p = df_d$p_values[df_d$p_value_calcs & df_d$subgroup == "Age < 13"], method = "BH")
df_d$hochberg_sig[df_d$p_value_calcs & df_d$subgroup == "Age 13-18"] <- 
  p.adjust(p = df_d$p_values[df_d$p_value_calcs & df_d$subgroup == "Age 13-18"], method = "BH")

df_d$hochberg_sig_star <- ""
df_d$hochberg_sig_star[df_d$hochberg_sig < 0.05] <- "*"
df_d$hochberg_sig_star[df_d$hochberg_sig < 0.01] <- "**"
df_d$hochberg_sig_star[df_d$hochberg_sig < 0.001] <- "**"

df_d$coef_se <- paste0(roundfunc(df_d$coefficients, 3), " (",
                     roundfunc(df_d$standard_errors, 3), ")", df_d$hochberg_sig_star)


df_d$outcome_label[df_d$outcome_label == "Proportion of presidential and midterm elections voted in"] <- "Proportion of federal elections voted in"
df_d_itt <- df_d[df_d$analysis_type == "ITT estimates" &
                   df_d$outcome_label %in% main_outcome,]


df_d_itt$variable_name[df_d_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_d_itt$variable_name[df_d_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"

# Table S8: Intention-to-treat estimates by demographic subgroup (gender; race/ethnicity) and age group at the age of randomization (without baseline covariates): main outcomes. 

# Main outcome: ITT, no covariates
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
output_tables(df_d_itt[,var_include], file_name = "main_outcomes_itt_nc_demographic")

```

# Analysis for only those in the L2 voter file

```{r rr-tables-l2}

# Clean up variables
# Outcome
df_rr$outcome_label <- gsub(pattern = "Voted: General Elections",
                            x = df_rr$outcome_label,
                            replacement = "Voted:")
# Experimental conditions
df_rr$group_name_c <- df_rr$variable_name
df_rr$group_name_c[df_rr$group_name_c == "Sec8"] <- "ITT estimate: Section 8"
df_rr$group_name_c[df_rr$group_name_c == "Exp"] <- "ITT estimate: Experimental voucher"
df_rr$group_name_c <- factor(df_rr$group_name_c, levels = c("Experimental voucher",
                                                      "Section 8",
                                                      "Control"))

# Results for just those in the L2 voter file
df_rr_l2 <- df_rr[grepl(df_rr$output_file_name, pattern = "L2", ignore.case = FALSE) & df_rr$group_name_c != "exp_groupExp = exp_groupSec8",]

# Fix subgroup label
df_rr_l2$subgroup <- gsub(pattern = " in voter file", replacement = "",
                          df_rr_l2$subgroup)

# Get the p-values
df_rr_l2$p_value_calcs <- df_rr_l2$analysis_type %in% c("ITT estimates") &
  df_rr_l2$variable_name != "Control mean"

# Apply the Benjamini-Hochberg procedure
df_rr_l2$hochberg_sig <- NA
df_rr_l2$hochberg_sig[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Adult"] <- 
  p.adjust(p = df_rr_l2$p_values[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Adult"], method = "BH")
df_rr_l2$hochberg_sig[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Age < 13"] <- 
  p.adjust(p = df_rr_l2$p_values[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Age < 13"], method = "BH")
df_rr_l2$hochberg_sig[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Age 13-18"] <- 
  p.adjust(p = df_rr_l2$p_values[df_rr_l2$p_value_calcs & df_rr_l2$subgroup == "Age 13-18"], method = "BH")

# Add stars for statistical significance 
df_rr_l2$hochberg_sig_star <- ""
df_rr_l2$hochberg_sig_star[df_rr_l2$hochberg_sig < 0.05] <- "*"
df_rr_l2$hochberg_sig_star[df_rr_l2$hochberg_sig < 0.01] <- "**"
df_rr_l2$hochberg_sig_star[df_rr_l2$hochberg_sig < 0.001] <- "**"

df_rr_l2$coef_se <- paste0(roundfunc(df_rr_l2$coefficients, 3), " (",
                     roundfunc(df_rr_l2$standard_errors, 3), ")", df_rr_l2$hochberg_sig_star)

# Clean up labels
df_rr_l2$outcome_label[df_rr_l2$outcome_label == "Proportion of presidential and midterm elections voted in"] <- "Proportion of federal elections voted in"
df_rr_l2_itt <- df_rr_l2[df_rr_l2$analysis_type == "ITT estimates" &
                   df_rr_l2$outcome_label %in% main_outcome,]


df_rr_l2_itt$variable_name[df_rr_l2_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_rr_l2_itt$variable_name[df_rr_l2_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"

# Table S15: Intention-to-treat estimates on voting outcomes by age group at the age of randomization (without baseline covariates) for participants linked to the L2 Voter File
# Main outcome: ITT, no covariates
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
output_tables(df_rr_l2[,var_include], file_name = "main_outcomes_itt_nc_l2")

```

# Households that are 4 members or bigger

```{r household4}
# Select households that are 4 members or bigger
df_rr_hh4 <- df_rr[grepl(df_rr$output_file_name, pattern = "4more", ignore.case = FALSE) & df_rr$group_name_c != "exp_groupExp = exp_groupSec8",]

# Fix subgroup label
df_rr_hh4$subgroup <- gsub(pattern = " in household size 4 or greater| in households size 4 or greater", replacement = "",
                          df_rr_hh4$subgroup)

# Get the p-values
df_rr_hh4$p_value_calcs <- df_rr_hh4$analysis_type %in% c("ITT estimates") &
  df_rr_hh4$variable_name != "Control mean"

# Apply the Benjamini-Hochberg procedure
df_rr_hh4$hochberg_sig <- NA
df_rr_hh4$hochberg_sig[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Adult"] <- 
  p.adjust(p = df_rr_hh4$p_values[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Adult"], method = "BH")
df_rr_hh4$hochberg_sig[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Age < 13"] <- 
  p.adjust(p = df_rr_hh4$p_values[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Age < 13"], method = "BH")
df_rr_hh4$hochberg_sig[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Age 13-18"] <- 
  p.adjust(p = df_rr_hh4$p_values[df_rr_hh4$p_value_calcs & df_rr_hh4$subgroup == "Age 13-18"], method = "BH")

# Add stars for signigifance 
df_rr_hh4$hochberg_sig_star <- ""
df_rr_hh4$hochberg_sig_star[df_rr_hh4$hochberg_sig < 0.05] <- "*"
df_rr_hh4$hochberg_sig_star[df_rr_hh4$hochberg_sig < 0.01] <- "**"
df_rr_hh4$hochberg_sig_star[df_rr_hh4$hochberg_sig < 0.001] <- "**"

df_rr_hh4$coef_se <- paste0(roundfunc(df_rr_hh4$coefficients, 3), " (",
                     roundfunc(df_rr_hh4$standard_errors, 3), ")", df_rr_hh4$hochberg_sig_star)

# Clean up labels
df_rr_hh4$outcome_label[df_rr_hh4$outcome_label == "Proportion of presidential and midterm elections voted in"] <- "Proportion of federal elections voted in"
df_rr_hh4_itt <- df_rr_hh4

df_rr_hh4_itt$variable_name[df_rr_hh4_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_rr_hh4_itt$variable_name[df_rr_hh4_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"

# Main outcome: ITT, no covariates
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
output_tables(df_rr_hh4_itt[,var_include], file_name = "main_outcomes_itt_nc_hh4")
```

# Whether the MTO experiment affected presidential election margins

```{r margins}
# Presidential election margins
df_rr_pm <- df_rr[grepl(df_rr$outcome_label, pattern = "Presidential Election Margins") & df_rr$variable_name != "exp_groupExp = exp_groupSec8",]

# Get the p-values
df_rr_pm$p_value_calcs <- df_rr_pm$analysis_type %in% c("ITT estimates") &
  df_rr_pm$variable_name != "Control mean"

# Apply the Benjamini-Hochberg procedure
df_rr_pm$hochberg_sig <- NA
df_rr_pm$hochberg_sig[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Adult"] <- 
  p.adjust(p = df_rr_pm$p_values[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Adult"], method = "BH")
df_rr_pm$hochberg_sig[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Age < 13"] <- 
  p.adjust(p = df_rr_pm$p_values[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Age < 13"], method = "BH")
df_rr_pm$hochberg_sig[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Age 13-18"] <- 
  p.adjust(p = df_rr_pm$p_values[df_rr_pm$p_value_calcs & df_rr_pm$subgroup == "Age 13-18"], method = "BH")

# Add stars for statistical significance 
df_rr_pm$hochberg_sig_star <- ""
df_rr_pm$hochberg_sig_star[df_rr_pm$hochberg_sig < 0.05] <- "*"
df_rr_pm$hochberg_sig_star[df_rr_pm$hochberg_sig < 0.01] <- "**"
df_rr_pm$hochberg_sig_star[df_rr_pm$hochberg_sig < 0.001] <- "**"

df_rr_pm$coef_se <- paste0(roundfunc(df_rr_pm$coefficients, 3), " (",
                     roundfunc(df_rr_pm$standard_errors, 3), ")", df_rr_pm$hochberg_sig_star)

# Clean up labels
df_rr_pm$outcome_label[df_rr_pm$outcome_label == "Proportion of presidential and midterm elections voted in"] <- "Proportion of federal elections voted in"
df_rr_pm_itt <- df_rr_pm

df_rr_pm_itt$variable_name[df_rr_pm_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_rr_pm_itt$variable_name[df_rr_pm_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"

# Table S17: Intention-to-treat estimates on participants’ state presidential election margins (percentage of votes for Republican candidate – percentage of votes for Democratic candidate), estimated without baseline covariates for participants who were adults at the time of randomization
# Main outcome: ITT, no covariates
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
output_tables(df_rr_pm_itt[,var_include], file_name = "main_outcomes_itt_nc_pm")
```

# Results by whether head of household has driver's license or not

```{r license}
# Drivers license
df_rr_license <- df_rr[grepl(df_rr$output_file_name, pattern = "license") & df_rr$variable_name != "exp_groupExp = exp_groupSec8",]
df_rr_license <- df_rr_license[df_rr_license$outcome_variable %in%
                                 c("in_voter_file", "General_prop"),]

# Get the p-values
df_rr_license$p_value_calcs <- df_rr_license$analysis_type %in% c("ITT estimates") &
  df_rr_license$variable_name != "Control mean"

# Apply the Benjamini-Hochberg procedure
df_rr_license$hochberg_sig <- NA
df_rr_license$hochberg_sig[df_rr_license$p_value_calcs] <- 
  p.adjust(p = df_rr_license$p_values[df_rr_license$p_value_calcs], method = "BH")

# Add stars for significance 
df_rr_license$hochberg_sig_star <- ""
df_rr_license$hochberg_sig_star[df_rr_license$hochberg_sig < 0.05] <- "*"
df_rr_license$hochberg_sig_star[df_rr_license$hochberg_sig < 0.01] <- "**"
df_rr_license$hochberg_sig_star[df_rr_license$hochberg_sig < 0.001] <- "**"

df_rr_license$coef_se <- paste0(roundfunc(df_rr_license$coefficients, 3), " (",
                     roundfunc(df_rr_license$standard_errors, 3), ")", df_rr_license$hochberg_sig_star)

# Clean up the labels
df_rr_license$outcome_label[df_rr_license$outcome_label == "Proportion of presidential and midterm elections voted in"] <- "Proportion of federal elections voted in"
df_rr_license_itt <- df_rr_license

df_rr_license_itt$variable_name[df_rr_license_itt$variable_name == "Exp"] <- "ITT estimate: Experimental voucher"
df_rr_license_itt$variable_name[df_rr_license_itt$variable_name == "Sec8"] <- "ITT estimate: Section 8"

# Table S18: Intention-to-treat estimates for heads of households by driver’s license status (without baseline covariates): main outcomes.
# Main outcome: ITT, no covariates
var_include <- c("outcome_label", "subgroup", "variable_name",
                 "coef_se", "N", "N_clusters")
output_tables(df_rr_license_itt[,var_include], file_name = "main_outcomes_itt_nc_license")

```